AnĂ¡lise Descritiva - GeoLV

DistribuiĂ§Ă£o dos Indicadores

Vamos começar carrengando o banco de dados.

library(tidyverse)
library(knitr)
library(plotly)
data <- read_rds("banco.rds")

ApĂ³s, uma anĂ¡lise descritiva das variĂ¡veis.

data %>% 
  ggplot(mapping = aes(x = dispersion)) +
  theme_minimal() +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 122 rows containing non-finite values (stat_bin).

data %>% 
  summarise(`1º Quartil` = round(quantile(dispersion, 0.25, na.rm = T), 6),
            `Mediana`    = round(quantile(dispersion, 0.50, na.rm = T), 6),
            `3º Quartil` = round(quantile(dispersion, 0.75, na.rm = T), 6),
            `Média`      = round(mean(dispersion, na.rm = T), 6),
            `Desvio PadrĂ£o` = sd(dispersion, na.rm = T)) %>% 
  kable(caption = "EstatĂ­stica descritiva da dispersĂ£o")
EstatĂ­stica descritiva da dispersĂ£o
1º Quartil Mediana 3º Quartil MĂ©dia Desvio PadrĂ£o
1.4e-05 0.000104 0.002448 0.008622 0.0322126
data %>% 
  ggplot(mapping = aes(x = clusters_count)) +
  geom_bar() +
  scale_x_continuous(breaks = seq(1, 6, by = 1))
## Warning: Removed 122 rows containing non-finite values (stat_count).

data %>% 
  ggplot(mapping = aes(x = providers_count)) +
  geom_bar() +
  theme_minimal() +
  scale_x_continuous(breaks = seq(1, 3, by = 1))
## Warning: Removed 122 rows containing non-finite values (stat_count).

DistribuiĂ§Ă£o dos Indicadores por MunicĂ­pio

data %>% 
  ggplot(mapping = aes(x = NM_LOCALIDADE, y = dispersion)) +
  geom_boxplot() + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 50, hjust = 1))
## Warning: Removed 122 rows containing non-finite values (stat_boxplot).

data %>% 
  ggplot(mapping = aes(x = NM_LOCALIDADE, y = dispersion)) +
  geom_boxplot() + 
    theme_minimal() +
  theme(axis.text.x = element_text(angle = 50, hjust = 1)) +
  ylim(c(0, 0.2))
## Warning: Removed 124 rows containing non-finite values (stat_boxplot).

data %>% 
  group_by(NM_LOCALIDADE) %>% 
  summarise(MEDIA = mean(clusters_count, na.rm = T)) %>% 
  mutate(DESVIO_MEDIA = (((MEDIA - mean(MEDIA))/mean(MEDIA)) * 100)) %>% 
  ggplot(mapping = aes(x = NM_LOCALIDADE, y = DESVIO_MEDIA)) +
  geom_segment(aes(xend = NM_LOCALIDADE, yend = 0, color = DESVIO_MEDIA > 0), alpha = 0.8, size = 2) +
  geom_point(size = 2) +
  geom_hline(yintercept = 0) +
  coord_flip() +
  scale_color_discrete(labels = c("Negativo", "Positivo")) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  labs(y = "Desvio Percentual em RelaĂ§Ă£o Ă  MĂ©dia (%)",
       x = "MunicĂ­pios",
       color = "DireĂ§Ă£o do Desvio",
       title = "Diferença de Quantidade de Clusters")

data %>% 
  group_by(NM_LOCALIDADE) %>% 
  summarise(MEDIA = mean(providers_count, na.rm = T)) %>% 
  mutate(DESVIO_MEDIA = (((MEDIA - mean(MEDIA))/mean(MEDIA)) * 100)) %>% 
  ggplot(mapping = aes(x = NM_LOCALIDADE, y = DESVIO_MEDIA)) +
  geom_segment(aes(xend = NM_LOCALIDADE, yend = 0, color = DESVIO_MEDIA > 0), alpha = 0.8, size = 2) +
  geom_point(size = 2) +
  geom_hline(yintercept = 0) +
  coord_flip() +
  scale_color_discrete(labels = c("Negativo", "Positivo")) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  labs(y = "Desvio Percentual em RelaĂ§Ă£o Ă  MĂ©dia (%)",
       x = "MunicĂ­pios",
       color = "DireĂ§Ă£o do Desvio",
       title = "Diferença de Quantidade de Provedores")

data %>% 
  ggplot(mapping = aes(x = clusters_count, y = providers_count, color = log(dispersion))) +
  geom_point() +
  geom_jitter()
## Warning: Removed 122 rows containing missing values (geom_point).

## Warning: Removed 122 rows containing missing values (geom_point).

ggplotly()